% Main Script: run_mc_R01_DGP02.m
%   Description:
%       This script produces the results of a Monte Carlo simulation, where the DGP is set as a
%       design with an additional covariate and heteroskedastic serially correlated errors
%   Output:
%       Table 8: Simulation results for the experiment in Section C.2

% ========================================
% Setting simulation environment and parameters
% ========================================
clear all;
% Randomization seed 
rng(12345)
% Number of cores being used in the parallel computation
parpool('local', 8)
% Number of replications
params.reps = 5000;
% True parameter
params.beta0 = 0;
% DGP with an additional covariate and heteroskedastic serially correlated errors
params.DGP_id = 2;
% Number of factors
params.R0 = 1;
params.R_arr = [1 2];
% Save file directory
params.folder_name = 'matlogs/R01_DGP02';
% Number of units and number of time periods
arr_N = 100;
arr_T = 50;
% Strength of factors
arr_kappa = [0:0.05:0.25 0.50 1.00]';

% ========================================
% Loop over all different combinations of N, T, and kappa
% ========================================
% Prepare arrays for outputs
res = cell(length(arr_N), length(arr_T), size(arr_kappa,1));
n_cases = length(arr_N) * length(arr_T) * size(arr_kappa,1);
i_case = 0;
for i_N = 1:length(arr_N)
    for i_T = 1:length(arr_T)
        for i_kappa = 1:size(arr_kappa,1)
            ticID_this_design = tic;
            params.N = arr_N(i_N);
            params.T = arr_T(i_T);
            params.kappa = arr_kappa(i_kappa,:);
            % Run the Monte Carlo simulation for the current parameters
            out = mc_weak_factors(params);
            res{i_N, i_T, i_kappa}.params = params;
            res{i_N, i_T, i_kappa}.LS.stats = out.LS.stats;
            res{i_N, i_T, i_kappa}.H.stats = out.H.stats;
            i_case = i_case + 1;
            fprintf(['Design %i/%i (N,T,kappa)=(%i, %i,' repmat(' %1.2f', 1, length(params.kappa)) ') took %1.2fs\n'], ...
                i_case, n_cases, params.N, params.T, params.kappa, toc(ticID_this_design));
        end
    end
end

% ========================================
% Construct oracle confidence intervals
% ========================================
% These oracle CIs are based on unknown design-specific least favorable critical values
LF_cv_LS = cell(size(res,1),size(res,2));
LF_cv = cell(size(res,1),size(res,2));
for i_N=1:size(res,1)
    for i_T=1:size(res,2)        
        for i_kappa=1:size(res,3)
            res_tmp = res{i_N,i_T,i_kappa};
            cv_LS_tmp(:,:,:,i_kappa) = res_tmp.LS.stats.cv;
            cv_tmp(:,:,i_kappa) = res_tmp.H.stats.cv;
        end
        % Maximize over kappa dimension
        LF_cv_LS{i_N,i_T} = max(cv_LS_tmp,[],4);
        LF_cv{i_N,i_T} = max(cv_tmp,[],3);
    end   
end

% Convert results to arrays
for i=1:numel(LF_cv_LS)
    LF_cv_LS_arr(:,:,:,i) = LF_cv_LS{i}; 
    LF_cv_arr(:,:,i) = LF_cv{i};
end
% Maximum across (N, T)
LF_cv_LS_max = max(LF_cv_LS_arr,[],4);
LF_cv_max = max(LF_cv_arr,[],3);


% ========================================
% Save the results
% ========================================
save(sprintf('%s/MC_merged_w_LFCV',params.folder_name),'res','LF_cv_LS','LF_cv_LS_max','LF_cv','LF_cv_max')
% Close parallel computation
if isunix
    delete(gcp('nocreate'));
end

% ========================================
% Output Table 8 as CSV file
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
% File list
folder_name = 'R01_DGP02';
fileinfo = dir(['matlogs/' folder_name '/*.mat']);
f_names = {fileinfo.name};
f_name_w_LFCV = f_names(contains(f_names,'w_LFCV'));
load(['matlogs/' folder_name '/' f_name_w_LFCV{:}]);
% Column names for output table
columnLabels = {'kappa', 'R'};
basic_estimators = {'LS','H'};
for i_b=1:length(basic_estimators)
    columnLabels = [columnLabels, {[basic_estimators{i_b} ' bias']}, {[basic_estimators{i_b} ' std']}, {[basic_estimators{i_b} ' rmse']}, ...
        {[basic_estimators{i_b} ' size']}, {[basic_estimators{i_b} ' len']}, {[basic_estimators{i_b} ' LFCV']}];
end
for i_N=1:length(arr_N)
    N = arr_N(i_N);
    tab_arr_R1 = [];
    tab_arr_R2 = [];
    for i_T=1:length(arr_T)
        T = arr_T(i_T);
        f_names_dgp = f_names(contains(f_names,sprintf('N%i_T%i',N,T)));
        for i_kappa = 1:length(arr_kappa)
            load(['matlogs/' folder_name '/' f_names_dgp{i_kappa}])
            if round(out.params.kappa,2) == round(arr_kappa(i_kappa),2)
                % Select those with bias correction for heteroskedasticity and serial correlation
                % LS.bias and and LS.rmse each contains six components, which are under
                %   (i & ii) no correction, (iii & iv) correcting bcorr2 & bcorr3, (v & vi)
                %       correcting bcorr1 & bcorr2 & bcorr3 in LS_factor.m
                % LS.std contains three components, which are under error assumption of
                %   (i & ii) homoscedasticity, (iii & iv) heteroscedasticity, (v & vi) serial
                %       correlation
                % LS.stats.rp and LS.stats.length are 3*6*3 arrays, where (2,3,3) and (2,4,3)
                %   represent that for these two estimates,
                %   (i) selecting alpha = 0.05 from [0.10 0.05 0.01]
                %   (ii) selecting beta estimate with correcting bcorr2 & bcorr3
                %       from [no_correction correcting_2_and_3 correcting_1_and_2_and_3]
                %   (iii) selecting se with serial_correlation assumption
                %       from [homoscedasticity  heteroscedasticity  serial_correlation]
                % H.stats elements all have two components represent two estimates
                tab_arr_R1 = [tab_arr_R1; ...
                    out.params.kappa, out.params.R_arr(1), ...
                    out.LS.stats.bias(3), out.LS.stats.std(3), out.LS.stats.rmse(3), out.LS.stats.rp(2,3,3)*100, out.LS.stats.length(2,3,3), mean(2*out.LS.se(:,3,3)*LF_cv_LS{i_N,i_T}(2,3,3)), ...                        
                    out.H.stats.bias(1), out.H.stats.std(1), out.H.stats.rmse(1), out.H.stats.size(1)*100, out.H.stats.length(1), mean(2*out.H.se(:,1)*LF_cv{i_N,i_T}(2,1))];
                tab_arr_R2 = [tab_arr_R2; ...
                    out.params.kappa, out.params.R_arr(2), ...
                    out.LS.stats.bias(4), out.LS.stats.std(4), out.LS.stats.rmse(4), out.LS.stats.rp(2,4,3)*100, out.LS.stats.length(2,4,3), mean(2*out.LS.se(:,4,3)*LF_cv_LS{i_N,i_T}(2,4,3)),  ...                        
                    out.H.stats.bias(2), out.H.stats.std(2), out.H.stats.rmse(2), out.H.stats.size(2)*100, out.H.stats.length(2), mean(2*out.H.se(:,2)*LF_cv{i_N,i_T}(2,2))];
            end
        end
    end
    tab_arr = [tab_arr_R1; tab_arr_R2];
    % Convert array to table
    tab8 = array2table(tab_arr, 'VariableNames', columnLabels);
end
% Output table
writetable(tab8, 'tables/table8.csv');
